home *** CD-ROM | disk | FTP | other *** search
- /*
- * This is a **very** simple text compression program which performs
- * a "Huffman" encoding of the most common characters in the input file.
- * It is designed to operate as a "unix" style filter, accepting input
- * from "stdin" and writing to "stdout".
- *
- * Syntax: HFTEXT (Encode | Decode) <input_file >output_file
- *
- * First, the input file is scanned, and a table built up containing
- * the most common characters (highest frequency at the beginning of
- * the table). Then the file is re-read, and any characters which are
- * in the table are replaced with a series of one bits equal in number
- * to its position in the table, followed by a zero bit. Thus, the
- * highest frequency character is encoded into two bits, the second
- * highest is encoded into three bits etc...
- *
- * Characters not occuring in the table are written with a zero bit,
- * followed by the 7 bits of the ASCII character value.
- *
- * Note that this scheme works only on ASCII text files, and becomes *very*
- * confused if the original file contains characters with the high bit set.
- *
- * Compile command: cc hftext -fop
- */
- #include <stdio.h>
- #include <file.h>
- #define TSIZE 7 /* Size of common character table */
-
- unsigned ftable[256] = 0, ocount = 0;
- unsigned char ctable[TSIZE] = 0, obyte = 0;
-
- main(argc, argv)
- int argc;
- char *argv[];
- {
- int i, j, k;
-
- stdin = setbuf(stdin, 1000);
- stdout = setbuf(stdout, 1000);
-
- /* Use MICRO-C's more powerful '&&' to force a zero if !enough args */
- switch((argc > 1) && toupper(*argv[1])) {
- case 'E' : /* Encode the file */
- *(char*)stdout |= F_BINARY; /* Convert stdout to BINARY */
- while((i = getc(stdin)) != EOF)
- ++ftable[i];
- rewind(stdin);
-
- /* Build table of most frequent characters */
- for(i=0; i < TSIZE; ++i) {
- k = 0;
- for(j=1; j < 256; ++j)
- if(ftable[j] > ftable[k])
- k = j;
- ctable[i] = k;
- ftable[k] = 0; }
-
- /* Write the index table */
- fwrite(ctable, TSIZE, stdout);
-
- /* Process the file */
- while((i = getc(stdin)) != EOF) {
- for(j=0; j < TSIZE; ++j) {
- if(ctable[j] == i)
- break; }
- if(j < TSIZE) { /* Write a token */
- do
- write_bit(1);
- while(j--);
- write_bit(0); }
- else { /* Write the character */
- write_bit(0);
- for(k=0; k < 7; ++k) {
- write_bit(i & 0x01);
- i >>= 1; } } }
-
- /* Clean up output bits */
- while(obyte)
- write_bit(0);
- break;
- case 'D' : /* Decode the file */
- *(char*)stdin |= F_BINARY; /* Convert stdin to BINARY */
- fread(ctable, TSIZE, stdin);
- while((i = read_bit()) != EOF) {
- j = 0;
- if(i) { /* token */
- while((k = read_bit()) && (k != EOF))
- ++j;
- j = ctable[j]; }
- else { /* Normal character */
- for(k=0; k < 7; ++k)
- j = (j >> 1) | read_bit();
- j >>= 1; }
- putc(j, stdout); }
- break;
- default:
- abort("Use: HFTEXT E|D <input_file >output_file"); }
-
- fflush(stdout);
- }
-
- /*
- * Write a single bit to the output file
- */
- write_bit(value)
- int value;
- {
- obyte = (obyte << 1) | value;
- if(++ocount > 7) {
- putc(obyte, stdout);
- ocount = obyte = 0; }
- }
-
- /*
- * Read a single bit from the input file
- */
- read_bit()
- {
- int i;
-
- if(!ocount) {
- if((obyte = getc(stdin)) == EOF)
- return EOF;
- ocount = 8; }
- i = obyte;
- obyte <<= 1;
- --ocount;
- return i & 0x80;
- }
-